-
Notifications
You must be signed in to change notification settings - Fork 12.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[VPlan] Remove loop region in optimizeForVFAndUF. #108378
[VPlan] Remove loop region in optimizeForVFAndUF. #108378
Conversation
@llvm/pr-subscribers-backend-systemz @llvm/pr-subscribers-llvm-transforms Author: Florian Hahn (fhahn) ChangesUpdate optimizeForVFAndUF to completely remove the vector loop region when possible. At the moment, we cannot remove the region if it contains
Both cases can be addressed by more explicit modeling. The patch also includes a number of updates to allow executing VPlans without a vector loop region which can be split off. Patch is 65.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/108378.diff 13 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3b6b154b9660cf..81e6d893400f40 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2392,7 +2392,10 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
AC->registerAssumption(II);
// End if-block.
- bool IfPredicateInstr = RepRecipe->getParent()->getParent()->isReplicator();
+ bool IfPredicateInstr =
+ RepRecipe->getParent()->getParent()
+ ? RepRecipe->getParent()->getParent()->isReplicator()
+ : false;
if (IfPredicateInstr)
PredicatedInstructions.push_back(Cloned);
}
@@ -2954,6 +2957,8 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
for (PHINode &PN : Exit->phis())
PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
+ if (!isa<VPRegionBlock>(State.Plan->getEntry()->getSingleSuccessor()))
+ return;
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
VPBasicBlock *LatchVPBB = VectorRegion->getExitingBasicBlock();
Loop *VectorLoop = LI->getLoopFor(State.CFG.VPBB2IRBB[LatchVPBB]);
@@ -7598,8 +7603,8 @@ LoopVectorizationPlanner::executePlan(
// 2.5 Collect reduction resume values.
DenseMap<const RecurrenceDescriptor *, Value *> ReductionResumeValues;
- auto *ExitVPBB =
- cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
+ auto *ExitVPBB = cast<VPBasicBlock>(
+ BestVPlan.getEntry()->getSingleSuccessor()->getSingleSuccessor());
for (VPRecipeBase &R : *ExitVPBB) {
createAndCollectMergePhiForReduction(
dyn_cast<VPInstruction>(&R), ReductionResumeValues, State, OrigLoop,
@@ -7615,24 +7620,26 @@ LoopVectorizationPlanner::executePlan(
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
LLVMLoopVectorizeFollowupVectorized});
- VPBasicBlock *HeaderVPBB =
- BestVPlan.getVectorLoopRegion()->getEntryBasicBlock();
- Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]);
- if (VectorizedLoopID)
- L->setLoopID(*VectorizedLoopID);
- else {
- // Keep all loop hints from the original loop on the vector loop (we'll
- // replace the vectorizer-specific hints below).
- if (MDNode *LID = OrigLoop->getLoopID())
- L->setLoopID(LID);
-
- LoopVectorizeHints Hints(L, true, *ORE);
- Hints.setAlreadyVectorized();
+ if (auto *R =
+ dyn_cast<VPRegionBlock>(BestVPlan.getEntry()->getSingleSuccessor())) {
+ VPBasicBlock *HeaderVPBB = R->getEntryBasicBlock();
+ Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]);
+ if (VectorizedLoopID)
+ L->setLoopID(*VectorizedLoopID);
+ else {
+ // Keep all loop hints from the original loop on the vector loop (we'll
+ // replace the vectorizer-specific hints below).
+ if (MDNode *LID = OrigLoop->getLoopID())
+ L->setLoopID(LID);
+
+ LoopVectorizeHints Hints(L, true, *ORE);
+ Hints.setAlreadyVectorized();
+ }
+ TargetTransformInfo::UnrollingPreferences UP;
+ TTI.getUnrollingPreferences(L, *PSE.getSE(), UP, ORE);
+ if (!UP.UnrollVectorizedLoop || CanonicalIVStartValue)
+ addRuntimeUnrollDisableMetaData(L);
}
- TargetTransformInfo::UnrollingPreferences UP;
- TTI.getUnrollingPreferences(L, *PSE.getSE(), UP, ORE);
- if (!UP.UnrollVectorizedLoop || CanonicalIVStartValue)
- addRuntimeUnrollDisableMetaData(L);
// 3. Fix the vectorized code: take care of header phi's, live-outs,
// predication, updating analyses.
@@ -9468,7 +9475,8 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) {
State.Builder, CanonicalIV, getStartValue()->getLiveInIRValue(), Step,
Kind, cast_if_present<BinaryOperator>(FPBinOp));
DerivedIV->setName("offset.idx");
- assert(DerivedIV != CanonicalIV && "IV didn't need transforming?");
+ assert((isa<Constant>(CanonicalIV) || DerivedIV != CanonicalIV) &&
+ "IV didn't need transforming?");
State.set(this, DerivedIV, VPIteration(0, 0));
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 276e90c7670a44..db7a6c8ea7e1f0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -226,8 +226,7 @@ VPTransformState::VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
InnerLoopVectorizer *ILV, VPlan *Plan,
LLVMContext &Ctx)
: VF(VF), UF(UF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
- LVer(nullptr),
- TypeAnalysis(Plan->getCanonicalIV()->getScalarType(), Ctx) {}
+ LVer(nullptr), TypeAnalysis(IntegerType::get(Ctx, 64), Ctx) {}
Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
if (Def->isLiveIn())
@@ -278,8 +277,8 @@ Value *VPTransformState::get(VPValue *Def, unsigned Part, bool NeedsScalar) {
// Place the code for broadcasting invariant variables in the new preheader.
IRBuilder<>::InsertPointGuard Guard(Builder);
if (SafeToHoist) {
- BasicBlock *LoopVectorPreHeader = CFG.VPBB2IRBB[cast<VPBasicBlock>(
- Plan->getVectorLoopRegion()->getSinglePredecessor())];
+ BasicBlock *LoopVectorPreHeader =
+ CFG.VPBB2IRBB[cast<VPBasicBlock>(Plan->getEntry())];
if (LoopVectorPreHeader)
Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
}
@@ -934,7 +933,7 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
// FIXME: Model VF * UF computation completely in VPlan.
- assert(VFxUF.getNumUsers() && "VFxUF expected to always have users");
+ // assert(VFxUF.getNumUsers() && "VFxUF expected to always have users");
if (VF.getNumUsers()) {
Value *RuntimeVF = getRuntimeVF(Builder, TCTy, State.VF);
VF.setUnderlyingValue(RuntimeVF);
@@ -1005,8 +1004,13 @@ void VPlan::execute(VPTransformState *State) {
// skeleton creation, so we can only create the VPIRBasicBlocks now during
// VPlan execution rather than earlier during VPlan construction.
BasicBlock *MiddleBB = State->CFG.ExitBB;
- VPBasicBlock *MiddleVPBB =
- cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());
+ VPBlockBase *Leaf = nullptr;
+ for (VPBlockBase *VPB : vp_depth_first_shallow(getEntry()))
+ if (VPB->getNumSuccessors() == 0) {
+ Leaf = VPB;
+ break;
+ }
+ VPBasicBlock *MiddleVPBB = cast<VPBasicBlock>(Leaf->getSinglePredecessor());
// Find the VPBB for the scalar preheader, relying on the current structure
// when creating the middle block and its successrs: if there's a single
// predecessor, it must be the scalar preheader. Otherwise, the second
@@ -1034,64 +1038,66 @@ void VPlan::execute(VPTransformState *State) {
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
Block->execute(State);
- VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
- BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
-
- // Fix the latch value of canonical, reduction and first-order recurrences
- // phis in the vector loop.
- VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
- for (VPRecipeBase &R : Header->phis()) {
- // Skip phi-like recipes that generate their backedege values themselves.
- if (isa<VPWidenPHIRecipe>(&R))
- continue;
-
- if (isa<VPWidenPointerInductionRecipe>(&R) ||
- isa<VPWidenIntOrFpInductionRecipe>(&R)) {
- PHINode *Phi = nullptr;
- if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
- Phi = cast<PHINode>(State->get(R.getVPSingleValue(), 0));
- } else {
- auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
- assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
- "recipe generating only scalars should have been replaced");
- auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0));
- Phi = cast<PHINode>(GEP->getPointerOperand());
- }
-
- Phi->setIncomingBlock(1, VectorLatchBB);
+ if (auto *LoopRegion =
+ dyn_cast<VPRegionBlock>(getEntry()->getSingleSuccessor())) {
+ VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
+ BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
+
+ // Fix the latch value of canonical, reduction and first-order recurrences
+ // phis in the vector loop.
+ VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
+ for (VPRecipeBase &R : Header->phis()) {
+ // Skip phi-like recipes that generate their backedege values themselves.
+ if (isa<VPWidenPHIRecipe>(&R))
+ continue;
- // Move the last step to the end of the latch block. This ensures
- // consistent placement of all induction updates.
- Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
- Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
- continue;
- }
+ if (isa<VPWidenPointerInductionRecipe>(&R) ||
+ isa<VPWidenIntOrFpInductionRecipe>(&R)) {
+ PHINode *Phi = nullptr;
+ if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
+ Phi = cast<PHINode>(State->get(R.getVPSingleValue(), 0));
+ } else {
+ auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
+ assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
+ "recipe generating only scalars should have been replaced");
+ auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0));
+ Phi = cast<PHINode>(GEP->getPointerOperand());
+ }
+
+ Phi->setIncomingBlock(1, VectorLatchBB);
+
+ // Move the last step to the end of the latch block. This ensures
+ // consistent placement of all induction updates.
+ Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
+ Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
+ continue;
+ }
- auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
- // For canonical IV, first-order recurrences and in-order reduction phis,
- // only a single part is generated, which provides the last part from the
- // previous iteration. For non-ordered reductions all UF parts are
- // generated.
- bool SinglePartNeeded =
- isa<VPCanonicalIVPHIRecipe>(PhiR) ||
- isa<VPFirstOrderRecurrencePHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
- (isa<VPReductionPHIRecipe>(PhiR) &&
- cast<VPReductionPHIRecipe>(PhiR)->isOrdered());
- bool NeedsScalar =
- isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
- (isa<VPReductionPHIRecipe>(PhiR) &&
- cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
- unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;
-
- for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
- Value *Phi = State->get(PhiR, Part, NeedsScalar);
- Value *Val =
- State->get(PhiR->getBackedgeValue(),
- SinglePartNeeded ? State->UF - 1 : Part, NeedsScalar);
- cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
+ auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
+ // For canonical IV, first-order recurrences and in-order reduction phis,
+ // only a single part is generated, which provides the last part from the
+ // previous iteration. For non-ordered reductions all UF parts are
+ // generated.
+ bool SinglePartNeeded =
+ isa<VPCanonicalIVPHIRecipe>(PhiR) ||
+ isa<VPFirstOrderRecurrencePHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
+ (isa<VPReductionPHIRecipe>(PhiR) &&
+ cast<VPReductionPHIRecipe>(PhiR)->isOrdered());
+ bool NeedsScalar =
+ isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
+ (isa<VPReductionPHIRecipe>(PhiR) &&
+ cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
+ unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;
+
+ for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
+ Value *Phi = State->get(PhiR, Part, NeedsScalar);
+ Value *Val =
+ State->get(PhiR->getBackedgeValue(),
+ SinglePartNeeded ? State->UF - 1 : Part, NeedsScalar);
+ cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
+ }
}
}
-
State->CFG.DTU.flush();
assert(State->CFG.DTU.getDomTree().verify(
DominatorTree::VerificationLevel::Fast) &&
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 64242e43c56bc8..7b6fb2f31f2d18 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3311,6 +3311,7 @@ class VPRegionBlock : public VPBlockBase {
assert(!isReplicator() && "should only get pre-header of loop regions");
return getSinglePredecessor()->getExitingBasicBlock();
}
+ void clearEntry() { Entry = nullptr; }
/// An indicator whether this region is to generate multiple replicated
/// instances of output IR corresponding to its VPBlockBases.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index b722ec34ee6fb6..83726b54fea107 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -702,16 +702,46 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
!SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C))
return;
- LLVMContext &Ctx = SE.getContext();
- auto *BOC =
- new VPInstruction(VPInstruction::BranchOnCond,
- {Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx))});
-
SmallVector<VPValue *> PossiblyDead(Term->operands());
Term->eraseFromParent();
+ VPBasicBlock *Header =
+ cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getEntry());
+ if (all_of(Header->phis(), [](VPRecipeBase &R) {
+ return !isa<VPWidenIntOrFpInductionRecipe, VPReductionPHIRecipe>(&R);
+ })) {
+ for (VPRecipeBase &R : make_early_inc_range(Header->phis())) {
+ auto *P = cast<VPHeaderPHIRecipe>(&R);
+ P->replaceAllUsesWith(P->getStartValue());
+ P->eraseFromParent();
+ }
+
+ VPBlockBase *Preheader = Plan.getVectorLoopRegion()->getSinglePredecessor();
+ auto HeaderSuccs = to_vector(Header->getSuccessors());
+ VPBasicBlock *Exiting =
+ cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getExiting());
+
+ auto *LoopRegion = Plan.getVectorLoopRegion();
+ VPBlockBase *Middle = LoopRegion->getSingleSuccessor();
+ VPBlockUtils::disconnectBlocks(Preheader, LoopRegion);
+ VPBlockUtils::disconnectBlocks(LoopRegion, Middle);
+
+ Header->setParent(nullptr);
+ Exiting->setParent(nullptr);
+ VPBlockUtils::connectBlocks(Preheader, Header);
+
+ VPBlockUtils::connectBlocks(Exiting, Middle);
+ LoopRegion->clearEntry();
+ delete LoopRegion;
+ } else {
+ LLVMContext &Ctx = SE.getContext();
+ auto *BOC =
+ new VPInstruction(VPInstruction::BranchOnCond,
+ {Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx))});
+
+ ExitingVPBB->appendRecipe(BOC);
+ }
for (VPValue *Op : PossiblyDead)
recursivelyDeleteDeadRecipes(Op);
- ExitingVPBB->appendRecipe(BOC);
Plan.setVF(BestVF);
Plan.setUF(BestUF);
// TODO: Further simplifications are possible
@@ -720,7 +750,7 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
}
/// Sink users of \p FOR after the recipe defining the previous value \p
-/// Previous of the recurrence. \returns true if all users of \p FOR could be
+// Previous of the recurrence. \returns true if all users of \p FOR could be
/// re-arranged as needed or false if it is not possible.
static bool
sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll
index f90524fde79654..1b3dabef5333a6 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll
@@ -81,17 +81,13 @@ define void @powi_call(ptr %P) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
-; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[P]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[P]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP2]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> [[WIDE_LOAD]], i32 3)
-; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[TMP2]], align 8
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0
+; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[TMP4]], align 8
+; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
@@ -105,7 +101,7 @@ define void @powi_call(ptr %P) {
; CHECK-NEXT: store double [[POWI]], ptr [[GEP]], align 8
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
@@ -236,6 +232,5 @@ declare i64 @llvm.fshl.i64(i64, i64, i64)
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
-; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
-; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]]}
;.
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
index ec50b0cac03829..13ebfa82f1da62 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
@@ -49,28 +49,25 @@ define void @trip3_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
-; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
-; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 3, [[TMP4]]
+; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
+; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 3, [[TMP2]]
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
-; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2
-; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 3)
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP7]]
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
+; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 3)
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 0
+; CHECK-NEXT: [[...
[truncated]
|
6d1cc96
to
dddcde5
Compare
This now depends #110004 and contains changes from it |
Use VPInstruction::ResumePhi to create phi nodes for reduction resume values. This allows simplifying createAndCollectMergePhiForReduction to only collect reduction resume phis when vectorizing epilogue loops and adding extra incoming edges from the main vector loop.
dddcde5
to
5f8fabe
Compare
if (VectorizedLoopID) | ||
L->setLoopID(*VectorizedLoopID); | ||
else { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (VectorizedLoopID) | |
L->setLoopID(*VectorizedLoopID); | |
else { | |
if (VectorizedLoopID) { | |
L->setLoopID(*VectorizedLoopID); | |
} else { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added, thanks!
|
||
VPBlockBase *Preheader = Plan.getVectorLoopRegion()->getSinglePredecessor(); | ||
auto HeaderSuccs = to_vector(Header->getSuccessors()); | ||
VPBasicBlock *Exiting = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
VPBasicBlock *Exiting = | |
auto *Exiting = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, thanks!
SmallVector<VPValue *> PossiblyDead(Term->operands()); | ||
Term->eraseFromParent(); | ||
VPBasicBlock *Header = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
VPBasicBlock *Header = | |
auto *Header = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, thanks!
@@ -714,7 +744,7 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, | |||
} | |||
|
|||
/// Sink users of \p FOR after the recipe defining the previous value \p | |||
/// Previous of the recurrence. \returns true if all users of \p FOR could be | |||
// Previous of the recurrence. \returns true if all users of \p FOR could be |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// Previous of the recurrence. \returns true if all users of \p FOR could be | |
/// Previous of the recurrence. \returns true if all users of \p FOR could be |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removed change, thanks!
…ion-instead-of-using-branch-on-cond-true
…ion-instead-of-using-branch-on-cond-true
…ion-instead-of-using-branch-on-cond-true
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Rebased after landing #110004
ping :)
if (VectorizedLoopID) | ||
L->setLoopID(*VectorizedLoopID); | ||
else { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added, thanks!
SmallVector<VPValue *> PossiblyDead(Term->operands()); | ||
Term->eraseFromParent(); | ||
VPBasicBlock *Header = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, thanks!
|
||
VPBlockBase *Preheader = Plan.getVectorLoopRegion()->getSinglePredecessor(); | ||
auto HeaderSuccs = to_vector(Header->getSuccessors()); | ||
VPBasicBlock *Exiting = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, thanks!
@@ -714,7 +744,7 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, | |||
} | |||
|
|||
/// Sink users of \p FOR after the recipe defining the previous value \p | |||
/// Previous of the recurrence. \returns true if all users of \p FOR could be | |||
// Previous of the recurrence. \returns true if all users of \p FOR could be |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removed change, thanks!
…ion-instead-of-using-branch-on-cond-true
ping :) |
if (isa<VPWidenPointerInductionRecipe>(&R) || | ||
isa<VPWidenIntOrFpInductionRecipe>(&R)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (isa<VPWidenPointerInductionRecipe>(&R) || | |
isa<VPWidenIntOrFpInductionRecipe>(&R)) { | |
if (isa<VPWidenPointerInductionRecipe, VPWidenIntOrFpInductionRecipe>(&R)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, thanks!
(isa<VPReductionPHIRecipe>(PhiR) && | ||
cast<VPReductionPHIRecipe>(PhiR)->isInLoop()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Replace isa+cast by dyn_cast?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
updated, thanks!
if (all_of(Header->phis(), [](VPRecipeBase &R) { | ||
return !isa<VPWidenIntOrFpInductionRecipe, VPReductionPHIRecipe>(&R); | ||
})) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (all_of(Header->phis(), [](VPRecipeBase &R) { | |
return !isa<VPWidenIntOrFpInductionRecipe, VPReductionPHIRecipe>(&R); | |
})) { | |
if (none_of(Header->phis(), IsaPred<VPWidenIntOrFpInductionRecipe, VPReductionPHIRecipe>)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, updated and reordered conditions
LoopRegion->clearEntry(); | ||
delete LoopRegion; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Potentially unsafe, may leak if the programmer forgets about it. Maybe, use Destructor or something like RAII?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure if there is a nice way to do so currently. The whole replace-and-erase logic in the block needs to happen atomically at the moment. To avoid having to do manual delete's, VPlan could track all blocks added to it and delete them all when the VPlan is deleted (at the moment it only deletes reachable blocks). But that would require some extra logic to make sure all blocks are added properly.
…ion-instead-of-using-branch-on-cond-true
✅ With the latest revision this PR passed the C/C++ code formatter. |
…ion-instead-of-using-branch-on-cond-true
…ion-instead-of-using-branch-on-cond-true
@@ -2954,6 +2957,8 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, | |||
for (PHINode &PN : Exit->phis()) | |||
PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN); | |||
|
|||
if (!isa<VPRegionBlock>(State.Plan->getEntry()->getSingleSuccessor())) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (!isa<VPRegionBlock>(State.Plan->getEntry()->getSingleSuccessor())) | |
if (!State.Plan->getVectorLoopRegion()) |
which should dyn_cast instead of cast.
@@ -2392,7 +2392,10 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr, | |||
AC->registerAssumption(II); | |||
|
|||
// End if-block. | |||
bool IfPredicateInstr = RepRecipe->getParent()->getParent()->isReplicator(); | |||
bool IfPredicateInstr = | |||
RepRecipe->getParent()->getParent() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Time to introduce getEnclosingRegion() to retrieve a VPRegionBlock grandparent?
auto *ExitVPBB = | ||
cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor()); | ||
auto *ExitVPBB = cast<VPBasicBlock>( | ||
BestVPlan.getEntry()->getSingleSuccessor()->getSingleSuccessor()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Time for VPlan to provide its Exit VP[IR]BB?
VPRegionBlock *Parent = RepRecipe->getParent()->getParent(); | ||
bool IfPredicateInstr = Parent ? Parent->isReplicator() : false; | ||
assert((Parent || all_of(RepRecipe->operands(), | ||
[](VPValue *Op) { | ||
return Op->isDefinedOutsideLoopRegions(); | ||
})) && | ||
"Expected a recipe is either within a region or all of its operands " | ||
"are defined outside the vectorized region."); | ||
assert( | ||
(Parent || !RepRecipe->getParent()->getPlan()->getVectorLoopRegion() || | ||
all_of(RepRecipe->operands(), | ||
[](VPValue *Op) { return Op->isDefinedOutsideLoopRegions(); })) && | ||
"Expected a recipe is either within a region or all of its operands " | ||
"are defined outside the vectorized region."); | ||
if (IfPredicateInstr) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We're actually interested here in asking if (RepRecipe->getParent()->getEnclosingReplicateRegion())
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes but such a helper needs to be added. Will check if there are other users that could benefit, thanks
if (!State.Plan->getVectorLoopRegion()) | ||
return; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Comment why this is placed here, i.e., why all above should work even if vector loop region was removed, and all below should not.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added thanks
simplifyRecipes(Plan, CanIVTy); | ||
} | ||
|
||
VPlanTransforms::removeDeadRecipes(Plan); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Worth committing this replacement of recursivelyDeleteDeadRecipes() separately?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, thanks
VPInstruction::BranchOnCond, | ||
{Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx))}, Term->getDebugLoc()); | ||
ExitingVPBB->appendRecipe(BOC); | ||
} else { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Say something about what is about to happen now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added, thanks
ExitingVPBB->appendRecipe(BOC); | ||
} else { | ||
for (VPRecipeBase &R : make_early_inc_range(Header->phis())) { | ||
auto *P = cast<VPHeaderPHIRecipe>(&R); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
auto *P = cast<VPHeaderPHIRecipe>(&R); | |
auto *HeaderPhiR = cast<VPHeaderPHIRecipe>(&R); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
VPBlockBase *Preheader = Plan.getVectorPreheader(); | ||
VPBlockBase *Middle = Plan.getMiddleBlock(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Better take VectorRegion's single predecessor and single successor?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
|
||
for (VPBlockBase *B : vp_depth_first_shallow(VectorRegion->getEntry())) { | ||
if (isa<VPRegionBlock>(B)) | ||
B->setParent(nullptr); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What about basic block B's, between replicate regions - other than Header and ExitingVPBB?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated to clear parent for all blocks and removed setting just header and exitingVPBB above, thanks
Split off from #108378. This ensures that the logic works even if now vector region exits.
…ion-instead-of-using-branch-on-cond-true
…ion-instead-of-using-branch-on-cond-true
…ion-instead-of-using-branch-on-cond-true
Remove logic to re-use the previous basic block for the vector pre header from VPBasicBlock::execute. The preheader is now modeled as VPIRBasicBlock, so the code is no longer needed. Split off from #108378.
…ion-instead-of-using-branch-on-cond-true
…ion-instead-of-using-branch-on-cond-true
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/30/builds/13166 Here is the relevant piece of the build log for the reference
|
} | ||
|
||
bool VPValue::isDefinedOutsideLoopRegions() const { | ||
return !isDefinedInsideLoopRegions(this); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
post-commit nit:
} | |
} | |
…20918) This patch changes the way blocks are managed by VPlan. Previously all blocks reachable from entry would be cleaned up when a VPlan is destroyed. With this patch, each VPlan keeps track of blocks created for it in a list and this list is then used to delete all blocks in the list when the VPlan is destroyed. To do so, block creation is funneled through helpers in directly in VPlan. The main advantage of doing so is it simplifies CFG transformations, as those do not have to take care of deleting any blocks, just adjusting the CFG. This helps to simplify llvm/llvm-project#108378 and llvm/llvm-project#106748. This also simplifies handling of 'immutable' blocks a VPlan holds references to, which at the moment only include the scalar header block. PR: llvm/llvm-project#120918
…use (NFC) Move definitions to allow easy reuse in llvm/llvm-project#108378.
…xecute (NFCI). This ensures that all blocks created during VPlan execution are properly added to an enclosing loop, if present. Split off from llvm/llvm-project#108378 and also needed once more of the skeleton blocks are created directly via VPlan. This also allows removing the custom logic for early-exit loop vectorization added as part of llvm/llvm-project#117008.
Add missing test coverage of loops where the vector loop region can be removed that include replicate recipes as well as nested loops. Extra test coverage for llvm/llvm-project#108378.
Split off from llvm/llvm-project#108378. This ensures that the logic works even if now vector region exits.
… (NFC). Remove logic to re-use the previous basic block for the vector pre header from VPBasicBlock::execute. The preheader is now modeled as VPIRBasicBlock, so the code is no longer needed. Split off from llvm/llvm-project#108378.
Update optimizeForVFAndUF to completely remove the vector loop region when possible. At the moment, we cannot remove the region if it contains
Both cases can be addressed by more explicit modeling.
The patch also includes a number of updates to allow executing VPlans without a vector loop region which can be split off.
Depends on #110004